Practicing “turning knobs” to get a better sense of how the different parameters work. I am simulated data set 17.
Data were simulated across a square grid of 100 X 100 cells with depth contours.
sim.dat = readRDS("coursework/simulations/sim_data/sim_dat_17.RDS")
head(sim.dat)
## year X Y eta observed eta_scaled observed_scaled depth_scaled
## 1 1 1 1 17.04969 26.029819 0.7428023 0.7252767 -1.639687
## 2 1 2 1 17.37308 30.805359 0.7568914 0.8583390 -1.572175
## 3 1 3 1 17.69647 22.991022 0.7709806 0.6406058 -1.504663
## 4 1 4 1 18.01986 9.307899 0.7850697 0.2593488 -1.437152
## 5 1 5 1 18.34325 24.644069 0.7991588 0.6866651 -1.369640
## 6 1 6 1 18.66665 24.320600 0.8132480 0.6776522 -1.302129
grid.dat = readRDS("coursework/simulations/sim_data/grid.RDS")
grid.dat$negDepth = -1 * grid.dat$depth
head(grid.dat)
## X Y depth year depth_scaled negDepth
## 1 1 1 1 1 -1.639687 -1
## 2 2 1 2 1 -1.572175 -2
## 3 3 1 3 1 -1.504663 -3
## 4 4 1 4 1 -1.437152 -4
## 5 5 1 5 1 -1.369640 -5
## 6 6 1 6 1 -1.302129 -6
There are eight (8) columns in a simulated dataset: - year: the year, being 1:6; - X: the location along the x-axis; - Y: the location along the y-axis; - eta: the simulated values; - observed: the observed value of the simulated data (i.e. observation error applied to eta); - eta_scaled: the simulated data scaled by the mean of simulated data in a reference simulated dataset; - observed_scaled: the observed data scaled by the mean of observations in a reference simulated dataset; - depth_scaled: the depth scaled by subtracting the mean and dividing by the standard deviation.
#plot grid
ggplot(subset(grid.dat, year == 1), aes(x = X, y = Y)) +
geom_raster(aes(fill = negDepth)) +
scale_fill_viridis() +
scale_x_continuous(expand = c(0, 0)) + # Remove expansion on x-axis
scale_y_continuous(expand = c(0, 0)) # Remove expansion on x-axis
#plot eta
ggplot(subset(sim.dat, year == 1), aes(x = X, y = Y)) +
geom_raster(aes(fill = eta_scaled)) +
scale_fill_viridis() +
scale_x_continuous(expand = c(0, 0)) + # Remove expansion on x-axis
scale_y_continuous(expand = c(0, 0)) # Remove expansion on x-axis
#plot observed
ggplot(subset(sim.dat, year == 1), aes(x = X, y = Y)) +
geom_raster(aes(fill = observed_scaled)) +
scale_fill_viridis() +
scale_x_continuous(expand = c(0, 0)) + # Remove expansion on x-axis
scale_y_continuous(expand = c(0, 0)) # Remove expansion on x-axis
The grid shows deeper depths in the middle along x-axis. The simulated data (eta) shows variation throughout X and Y space. The simulated data (observed) shows grainier data and less smoothing across the patches.
Using simple random sampling (SRS). Testing process for just one year. Implementing dplyr version provided in second example because this is usually how I like to code.
source("coursework/simulations/functions_sampling.R")
set.seed(245)
sample.dat.yr = sim.dat %>% filter(year == 1) %>% slice_sample(n = 100) # taking 100 random samples
# create sample (as if you were a boat sampling once in each sampled grid cell)
obsCV = 0.2 # if using "observed" set this to zero (observation error already applied)
catchabilityPars = list(mean=1,var=0.1) #related to gear selectivity (and availability to net)
sample = sampleGrid.fn(sample.dat.yr, obsCV, catchabilityPars, varName="eta_scaled")
#plot sample
ggplot(sample, aes(x = X, y = Y)) +
geom_raster(aes(fill = observation)) +
scale_fill_viridis() +
scale_x_continuous(expand = c(0, 0)) + # Remove expansion on x-axis
scale_y_continuous(expand = c(0, 0)) # Remove expansion on x-axis
## Warning: Raster pixels are placed at uneven horizontal intervals and will be shifted
## ℹ Consider using `geom_tile()` instead.
## Raster pixels are placed at uneven horizontal intervals and will be shifted
## ℹ Consider using `geom_tile()` instead.
Using SRS across all years (6 in total).
# sample from each year of survey data
sample.dat.all.yr = sim.dat %>% group_by(year) %>% slice_sample(n = 100)
samples = sampleGrid.fn(as.data.frame(sample.dat.all.yr), obsCV, catchabilityPars, varName="eta_scaled")
# plot samples over surface of mean without observation error (taken as "true")
ggplot(sim.dat, aes(x = X, y = Y)) +
geom_raster(aes(fill = eta_scaled)) +
geom_point(data = samples, aes(size = observation), pch = 21) +
scale_fill_viridis() +
scale_size_area() +
coord_cartesian(expand = FALSE) +
facet_wrap(~year)
Assuming that the area of each grid cell is 1, under simple random sampling the design-based estimate is the product of the mean of observations and the total area (1 * the number of grid cells in the domain). We then calculate the variance, standard error and 95% confidence interval.
N = 100*100
n = 100
index_db = samples %>%
group_by(year) %>%
summarize(index = mean(observation) * N,
variance = N^2 * (1-(n/N)) * (var(observation)/n)) %>%
mutate(se = sqrt(variance),
lwr = index - qt(0.975, df = n - 1) * se,
upr = index + qt(0.975, df = n - 1) * se)
# plot index with 95% CI
ggplot(index_db, aes(x = year, y = index)) +
geom_line() +
geom_ribbon(aes(ymin = lwr, ymax = upr), alpha = 0.4) +
labs(y = "Total biomass estimate",
x = "Year")
We see that the biomass estimate increases drastically in year 4 and remains higher afterwards.
# create mesh to size of samples
mesh = make_mesh(samples, xy_cols = c("X", "Y"), type = "cutoff_search", n_knots = 50)
## cutoff = 1.00 | knots = 1029 | ↓
## cutoff = 100.00 | knots = 24 | ↑
## cutoff = 10.00 | knots = 91 | ↓
## cutoff = 31.62 | knots = 34 | ↑
## cutoff = 17.78 | knots = 46 | ↑
## cutoff = 13.34 | knots = 61 | ↓
## cutoff = 15.40 | knots = 53 | ↓
## cutoff = 16.55 | knots = 49 | ↑
## cutoff = 15.96 | knots = 50 | ✔
plot(mesh)
model.fit = sdmTMB(
formula = observation ~ 0 + as.factor(year),
data = samples,
mesh = mesh,
time = "year",
family = lognormal(),
spatial = "on", # c("on", "off")
spatiotemporal = "iid", # c("iid", "ar1", "rw", "off")
)
model.fit
## Spatiotemporal model fit by ML ['sdmTMB']
## Formula: observation ~ 0 + as.factor(year)
## Mesh: mesh (isotropic covariance)
## Time column: character
## Data: samples
## Family: lognormal(link = 'log')
##
## Conditional model:
## coef.est coef.se
## as.factor(year)1 -0.09 0.09
## as.factor(year)2 -0.12 0.09
## as.factor(year)3 -0.05 0.09
## as.factor(year)4 0.27 0.09
## as.factor(year)5 0.16 0.09
## as.factor(year)6 0.13 0.09
##
## Dispersion parameter: 0.38
## Matérn range: 32.07
## Spatial SD: 0.16
## Spatiotemporal IID SD: 0.15
## ML criterion at convergence: 322.435
##
## See ?tidy.sdmTMB to extract these values as a data frame.
sanity(model.fit) # model checking
## ✔ Non-linear minimizer suggests successful convergence
## ✔ Hessian matrix is positive definite
## ✔ No extreme or very small eigenvalues detected
## ✔ No gradients with respect to fixed effects are >= 0.001
## ✔ No fixed-effect standard errors are NA
## ✔ No standard errors look unreasonably large
## ✔ No sigma parameters are < 0.01
## ✔ No sigma parameters are > 100
## ✔ Range parameter doesn't look unreasonably large
# randomized quantile residuals
samples$resids = residuals(model.fit)
hist(samples$resids) # looks ok but a bit left tailed
# qq plot
qqnorm(samples$resids)
# qqline not working?
# spatial residuals
ggplot(samples, aes(x = X, y = Y, color = resids)) +
scale_color_gradient2() +
geom_point() +
coord_fixed() +
facet_wrap(~year)
# hard for the untrained eye to assess if this looks good or not (I think it looks good?)
Predict to grid of full survey domain and map predictions and each component of the predictions (fixed and random effects) to see contributions of each to the full prediction.
# predict
predictions = predict(model.fit, newdata = grid.dat, return_tmb_object = TRUE)
# visualize predictions, then how fixed and random effects contribute
plot_map = function(sim.dat, column) { ggplot(sim.dat, aes(x = X, y = Y, fill = {{ column }})) +
geom_raster() + facet_wrap(~year) + coord_fixed() }
# full prediction
plot_map(predictions$data, exp(est)) +
scale_fill_viridis_c(trans = "sqrt") +
ggtitle("Prediction (fixed effects + all random effects)")
# fixed effects only (year)
plot_map(predictions$data, exp(est_non_rf)) +
ggtitle("Prediction (fixed effects only)") +
scale_fill_viridis_c(trans = "sqrt")
# spatial random effects
plot_map(predictions$data, omega_s) +
ggtitle("Spatial random effects only") +
scale_fill_gradient2()
# spatiotemporal random effects
plot_map(predictions$data, epsilon_st) +
ggtitle("Spatiotemporal random effects only") +
scale_fill_gradient2()
# we will assume that the area of each grid cell is 1
index_mb = get_index(predictions, area = 1, bias_correct = TRUE, level = 0.95)
# desired confidence interval here is 95%
# plot index with 95% CI
ggplot(index_mb, aes(x = year, y = est)) +
geom_line() +
geom_ribbon(aes(ymin = lwr, ymax = upr), alpha = 0.4) +
labs(y = "Total biomass estimate",
x = "Year")
Similar pattern to the design-based but let’s compare them together.
head(index_db)
## # A tibble: 6 × 6
## year index variance se lwr upr
## <int> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 9858. 153010. 391. 9082. 10634.
## 2 2 9318. 156145. 395. 8534. 10102.
## 3 3 10185. 223676. 473. 9247. 11123.
## 4 4 13707. 276770. 526. 12663. 14751.
## 5 5 12229. 241179. 491. 11254. 13203.
## 6 6 12613. 365280. 604. 11414. 13812.
head(index_mb)
## year est lwr upr log_est se se_natural type
## 1 1 9735.740 9010.906 10518.88 9.183559 0.03947420 9672.060 index
## 2 2 9399.384 8697.939 10157.40 9.148399 0.03957118 9336.304 index
## 3 3 10255.759 9485.777 11088.24 9.235595 0.03982010 10188.013 index
## 4 4 13954.490 12902.057 15092.77 9.543557 0.04000816 13861.578 index
## 5 5 12495.444 11505.547 13570.51 9.433119 0.04211038 12408.924 index
## 6 6 12397.966 11482.309 13386.64 9.425288 0.03914608 12313.304 index
index_db.b = index_db %>% select(year, index, lwr, upr) %>%
mutate(type = "Design")
index_mb.b = index_mb %>% select(year, est, lwr, upr) %>%
rename(index = est) %>%
mutate(type = "Model")
index_real = sim.dat %>% group_by(year) %>%
summarize(index.eta = sum(eta_scaled))
index.all = rbind(index_db.b, index_mb.b)
# plot both together
ggplot() +
geom_line(data = index.all, aes(x = year, y = index, color = type, fill = type)) +
geom_ribbon(data = index.all, aes(x = year, y = index, color = type, fill = type, ymin = lwr, ymax = upr),
alpha = 0.3) +
geom_point(data = index_real, aes(x = year, y = index.eta), size = 3, color = "black") +
labs(y = "Total biomass estimate",
x = "Year")
## Warning in geom_line(data = index.all, aes(x = year, y = index, color = type, :
## Ignoring unknown aesthetics: fill
# looks pretty similar but see more differences from year 4-6
Sampling 100 units in the 100x100 is the equivalent of sampling 1% of the entire grid. Let’s try 5% and 10%.
(100*100)*0.05
## [1] 500
# 500
(100*100)*0.1
## [1] 1000
# 1000
# sample from each year of survey data
set.seed(245)
sample.dat.1p = sim.dat %>% group_by(year) %>% slice_sample(n = 100)
sample.dat.5p = sim.dat %>% group_by(year) %>% slice_sample(n = 500)
sample.dat.10p = sim.dat %>% group_by(year) %>% slice_sample(n = 1000)
samples.1p = sampleGrid.fn(as.data.frame(sample.dat.1p), obsCV, catchabilityPars, varName="eta_scaled")
samples.5p = sampleGrid.fn(as.data.frame(sample.dat.5p), obsCV, catchabilityPars, varName="eta_scaled")
samples.10p = sampleGrid.fn(as.data.frame(sample.dat.10p), obsCV, catchabilityPars, varName="eta_scaled")
# plot samples over surface of mean without observation error (taken as "true")
# 1%
ggplot(sim.dat, aes(x = X, y = Y)) +
geom_raster(aes(fill = eta_scaled)) +
geom_point(data = samples.1p, aes(size = observation), pch = 21) +
scale_fill_viridis() +
scale_size_area() +
coord_cartesian(expand = FALSE) +
facet_wrap(~year)
# 5%
ggplot(sim.dat, aes(x = X, y = Y)) +
geom_raster(aes(fill = eta_scaled)) +
geom_point(data = samples.5p, aes(size = observation), pch = 21) +
scale_fill_viridis() +
scale_size_area() +
coord_cartesian(expand = FALSE) +
facet_wrap(~year)
# 10%
ggplot(sim.dat, aes(x = X, y = Y)) +
geom_raster(aes(fill = eta_scaled)) +
geom_point(data = samples.10p, aes(size = observation), pch = 21) +
scale_fill_viridis() +
scale_size_area() +
coord_cartesian(expand = FALSE) +
facet_wrap(~year)
Wow looks like I might be confused? The 10% bubbles almost fill up the entire grid…but maybe that is just the plotting.
N = 100*100
n.1 = 100
n.5 = 500
n.10 = 1000
index_db.1p = samples.1p %>%
group_by(year) %>%
summarize(index = mean(observation) * N,
variance = N^2 * (1-(n.1/N)) * (var(observation)/n.1)) %>%
mutate(se = sqrt(variance),
lwr = index - qt(0.975, df = n.1 - 1) * se,
upr = index + qt(0.975, df = n.1 - 1) * se,
sample = "1perc")
index_db.5p = samples.5p %>%
group_by(year) %>%
summarize(index = mean(observation) * N,
variance = N^2 * (1-(n.5/N)) * (var(observation)/n.5)) %>%
mutate(se = sqrt(variance),
lwr = index - qt(0.975, df = n.5 - 1) * se,
upr = index + qt(0.975, df = n.5 - 1) * se,
sample = "5perc")
index_db.10p = samples.10p %>%
group_by(year) %>%
summarize(index = mean(observation) * N,
variance = N^2 * (1-(n.10/N)) * (var(observation)/n.10)) %>%
mutate(se = sqrt(variance),
lwr = index - qt(0.975, df = n.10 - 1) * se,
upr = index + qt(0.975, df = n.10 - 1) * se,
sample = "10perc")
index_db.allp = rbind(index_db.1p, index_db.5p, index_db.10p) %>%
mutate(sample = factor(sample, levels = c("1perc", "5perc", "10perc")))
# plot index with 95% CI
ggplot() +
geom_line(data = index_db.allp, aes(x = year, y = index, color = sample, fill = sample), size = 2) +
geom_ribbon(data = index_db.allp, aes(x = year, y = index, color = sample, fill = sample, ymin = lwr, ymax = upr),
alpha = 0.25) +
geom_point(data = index_real, aes(x = year, y = index.eta), size = 3, color = "black") +
labs(y = "Total biomass estimate",
x = "Year")
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning in geom_line(data = index_db.allp, aes(x = year, y = index, color =
## sample, : Ignoring unknown aesthetics: fill
1% and 10% follow similar trend, but the 5% misses the slight dip in Y5. 1% though misses decline in Y6. Unsurprisingly, the higher the sample size the smaller the confidence intervals around the estimate. What is interesting though is that in the 1% situation, the larger confidence intervals end up overlapping with the true data, but the 5 and 10% ones don’t. So in a way, the larger sample size decrease in CI shows greater “precision” but it is not?
# create mesh to size of samples
mesh.1p = make_mesh(samples.1p, xy_cols = c("X", "Y"), type = "cutoff_search", n_knots = 50)
## cutoff = 1.00 | knots = 1060 | ↓
## cutoff = 100.00 | knots = 24 | ↑
## cutoff = 10.00 | knots = 91 | ↓
## cutoff = 31.62 | knots = 32 | ↑
## cutoff = 17.78 | knots = 47 | ↑
## cutoff = 13.34 | knots = 66 | ↓
## cutoff = 15.40 | knots = 56 | ↓
## cutoff = 16.55 | knots = 53 | ↓
## cutoff = 17.15 | knots = 52 | ↓
## cutoff = 17.47 | knots = 48 | ↑
## cutoff = 17.31 | knots = 51 | ↓
## cutoff = 17.39 | knots = 51 | ↓
## cutoff = 17.43 | knots = 51 | ↓
## cutoff = 17.45 | knots = 51 | ↓
## cutoff = 17.46 | knots = 51 | ↓
## cutoff = 17.46 | knots = 51 | ↓
## cutoff = 17.46 | knots = 51 | ↓
## cutoff = 17.46 | knots = 51 | ↓
## cutoff = 17.46 | knots = 48 | ↑
## cutoff = 17.46 | knots = 48 | ↑
## cutoff = 17.46 | knots = 48
plot(mesh.1p)
mesh.5p = make_mesh(samples.5p, xy_cols = c("X", "Y"), type = "cutoff_search", n_knots = 75)
## cutoff = 1.00 | knots = 3499 | ↓
## cutoff = 100.00 | knots = 24 | ↑
## cutoff = 10.00 | knots = 101 | ↓
## cutoff = 31.62 | knots = 37 | ↑
## cutoff = 17.78 | knots = 52 | ↑
## cutoff = 13.34 | knots = 67 | ↑
## cutoff = 11.55 | knots = 79 | ↓
## cutoff = 12.41 | knots = 72 | ↑
## cutoff = 11.97 | knots = 78 | ↓
## cutoff = 12.19 | knots = 73 | ↑
## cutoff = 12.08 | knots = 77 | ↓
## cutoff = 12.13 | knots = 75 | ✔
plot(mesh.5p)
mesh.10p = make_mesh(samples.10p, xy_cols = c("X", "Y"), type = "cutoff_search", n_knots = 100)
## cutoff = 1.00 | knots = 5431 | ↓
## cutoff = 100.00 | knots = 24 | ↑
## cutoff = 10.00 | knots = 101 | ↓
## cutoff = 31.62 | knots = 34 | ↑
## cutoff = 17.78 | knots = 46 | ↑
## cutoff = 13.34 | knots = 64 | ↑
## cutoff = 11.55 | knots = 85 | ↑
## cutoff = 10.75 | knots = 92 | ↑
## cutoff = 10.37 | knots = 96 | ↑
## cutoff = 10.18 | knots = 97 | ↑
## cutoff = 10.09 | knots = 97 | ↑
## cutoff = 10.04 | knots = 100 | ✔
plot(mesh.10p)
model.fit.1p = sdmTMB(
formula = observation ~ 0 + as.factor(year),
data = samples.1p,
mesh = mesh.1p,
time = "year",
family = lognormal(),
spatial = "on", # c("on", "off")
spatiotemporal = "iid", # c("iid", "ar1", "rw", "off")
)
model.fit.1p
## Spatiotemporal model fit by ML ['sdmTMB']
## Formula: observation ~ 0 + as.factor(year)
## Mesh: mesh.1p (isotropic covariance)
## Time column: character
## Data: samples.1p
## Family: lognormal(link = 'log')
##
## Conditional model:
## coef.est coef.se
## as.factor(year)1 0.00 0.09
## as.factor(year)2 -0.02 0.09
## as.factor(year)3 0.08 0.09
## as.factor(year)4 0.16 0.09
## as.factor(year)5 0.19 0.09
## as.factor(year)6 0.14 0.09
##
## Dispersion parameter: 0.37
## Matérn range: 32.05
## Spatial SD: 0.10
## Spatiotemporal IID SD: 0.20
## ML criterion at convergence: 337.549
##
## See ?tidy.sdmTMB to extract these values as a data frame.
sanity(model.fit.1p) # model checking: good
## ✔ Non-linear minimizer suggests successful convergence
## ✔ Hessian matrix is positive definite
## ✔ No extreme or very small eigenvalues detected
## ✔ No gradients with respect to fixed effects are >= 0.001
## ✔ No fixed-effect standard errors are NA
## ✔ No standard errors look unreasonably large
## ✔ No sigma parameters are < 0.01
## ✔ No sigma parameters are > 100
## ✔ Range parameter doesn't look unreasonably large
model.fit.5p = sdmTMB(
formula = observation ~ 0 + as.factor(year),
data = samples.5p,
mesh = mesh.5p,
time = "year",
family = lognormal(),
spatial = "on", # c("on", "off")
spatiotemporal = "iid", # c("iid", "ar1", "rw", "off")
)
model.fit.5p
## Spatiotemporal model fit by ML ['sdmTMB']
## Formula: observation ~ 0 + as.factor(year)
## Mesh: mesh.5p (isotropic covariance)
## Time column: character
## Data: samples.5p
## Family: lognormal(link = 'log')
##
## Conditional model:
## coef.est coef.se
## as.factor(year)1 -0.07 0.11
## as.factor(year)2 -0.11 0.11
## as.factor(year)3 0.11 0.11
## as.factor(year)4 0.23 0.11
## as.factor(year)5 0.19 0.11
## as.factor(year)6 0.09 0.11
##
## Dispersion parameter: 0.38
## Matérn range: 42.22
## Spatial SD: 0.11
## Spatiotemporal IID SD: 0.21
## ML criterion at convergence: 1676.926
##
## See ?tidy.sdmTMB to extract these values as a data frame.
sanity(model.fit.5p) # model checking: good
## ✔ Non-linear minimizer suggests successful convergence
## ✔ Hessian matrix is positive definite
## ✔ No extreme or very small eigenvalues detected
## ✔ No gradients with respect to fixed effects are >= 0.001
## ✔ No fixed-effect standard errors are NA
## ✔ No standard errors look unreasonably large
## ✔ No sigma parameters are < 0.01
## ✔ No sigma parameters are > 100
## ✔ Range parameter doesn't look unreasonably large
model.fit.10p = sdmTMB(
formula = observation ~ 0 + as.factor(year),
data = samples.10p,
mesh = mesh.10p,
time = "year",
family = lognormal(),
spatial = "on", # c("on", "off")
spatiotemporal = "iid", # c("iid", "ar1", "rw", "off")
)
model.fit.10p
## Spatiotemporal model fit by ML ['sdmTMB']
## Formula: observation ~ 0 + as.factor(year)
## Mesh: mesh.10p (isotropic covariance)
## Time column: character
## Data: samples.10p
## Family: lognormal(link = 'log')
##
## Conditional model:
## coef.est coef.se
## as.factor(year)1 -0.05 0.11
## as.factor(year)2 -0.07 0.11
## as.factor(year)3 0.08 0.11
## as.factor(year)4 0.21 0.11
## as.factor(year)5 0.17 0.11
## as.factor(year)6 0.14 0.11
##
## Dispersion parameter: 0.38
## Matérn range: 46.79
## Spatial SD: 0.10
## Spatiotemporal IID SD: 0.21
## ML criterion at convergence: 3209.667
##
## See ?tidy.sdmTMB to extract these values as a data frame.
sanity(model.fit.10p) # model checking: good
## ✔ Non-linear minimizer suggests successful convergence
## ✔ Hessian matrix is positive definite
## ✔ No extreme or very small eigenvalues detected
## ✔ No gradients with respect to fixed effects are >= 0.001
## ✔ No fixed-effect standard errors are NA
## ✔ No standard errors look unreasonably large
## ✔ No sigma parameters are < 0.01
## ✔ No sigma parameters are > 100
## ✔ Range parameter doesn't look unreasonably large
# randomized quantile residuals
samples.1p$resids = residuals(model.fit.1p)
hist(samples.1p$resids) # looks ok but a bit left tailed
samples.5p$resids = residuals(model.fit.5p)
hist(samples.5p$resids) # looks ok but stronger tails?
samples.10p$resids = residuals(model.fit.10p)
hist(samples.10p$resids) # ooo strong left tailed
# qq plot
qqnorm(samples.1p$resids)
qqnorm(samples.5p$resids)
qqnorm(samples.10p$resids) # oh ya can see left tail issue
# spatial residuals
ggplot(samples.1p, aes(x = X, y = Y, color = resids)) +
scale_color_gradient2() +
geom_point() +
coord_fixed() +
facet_wrap(~year)
ggplot(samples.5p, aes(x = X, y = Y, color = resids)) +
scale_color_gradient2() +
geom_point() +
coord_fixed() +
facet_wrap(~year)
ggplot(samples.10p, aes(x = X, y = Y, color = resids)) +
scale_color_gradient2() +
geom_point() +
coord_fixed() +
facet_wrap(~year)
# visualize predictions, then how fixed and random effects contribute
plot_map = function(sim.dat, column) { ggplot(sim.dat, aes(x = X, y = Y, fill = {{ column }})) +
geom_raster() + facet_wrap(~year) + coord_fixed() }
# predict
predictions.1p = predict(model.fit.1p, newdata = grid.dat, return_tmb_object = TRUE)
predictions.5p = predict(model.fit.5p, newdata = grid.dat, return_tmb_object = TRUE)
predictions.10p = predict(model.fit.10p, newdata = grid.dat, return_tmb_object = TRUE)
# full prediciton
plot_map(predictions.1p$data, exp(est)) +
scale_fill_viridis_c(trans = "sqrt") +
ggtitle("Prediction (fixed effects + all random effects)")
plot_map(predictions.5p$data, exp(est)) +
scale_fill_viridis_c(trans = "sqrt") +
ggtitle("Prediction (fixed effects + all random effects)")
plot_map(predictions.10p$data, exp(est)) +
scale_fill_viridis_c(trans = "sqrt") +
ggtitle("Prediction (fixed effects + all random effects)")
# fixed effects
plot_map(predictions.1p$data, exp(est_non_rf)) +
ggtitle("Prediction (fixed effects only)") +
scale_fill_viridis_c(trans = "sqrt")
plot_map(predictions.5p$data, exp(est_non_rf)) +
ggtitle("Prediction (fixed effects only)") +
scale_fill_viridis_c(trans = "sqrt")
plot_map(predictions.10p$data, exp(est_non_rf)) +
ggtitle("Prediction (fixed effects only)") +
scale_fill_viridis_c(trans = "sqrt")
# spatial random only
plot_map(predictions.1p$data, omega_s) +
ggtitle("Spatial random effects only") +
scale_fill_gradient2()
plot_map(predictions.5p$data, omega_s) +
ggtitle("Spatial random effects only") +
scale_fill_gradient2()
plot_map(predictions.10p$data, omega_s) +
ggtitle("Spatial random effects only") +
scale_fill_gradient2()
# spatiaotemporal random only
plot_map(predictions.1p$data, epsilon_st) +
ggtitle("Spatiotemporal random effects only") +
scale_fill_gradient2()
plot_map(predictions.5p$data, epsilon_st) +
ggtitle("Spatiotemporal random effects only") +
scale_fill_gradient2()
plot_map(predictions.10p$data, epsilon_st) +
ggtitle("Spatiotemporal random effects only") +
scale_fill_gradient2()
# we will assume that the area of each grid cell is 1
index_mb.1p = get_index(predictions.1p, area = 1, bias_correct = TRUE, level = 0.95) %>%
mutate(sample = "1perc")
index_mb.5p = get_index(predictions.5p, area = 1, bias_correct = TRUE, level = 0.95) %>%
mutate(sample = "5perc")
index_mb.10p = get_index(predictions.10p, area = 1, bias_correct = TRUE, level = 0.95) %>%
mutate(sample = "10perc")
index_mb.allp = rbind(index_mb.1p, index_mb.5p, index_mb.10p) %>%
mutate(sample = factor(sample, levels = c("1perc", "5perc", "10perc")))
# desired confidence interval here is 95%
# plot index with 95% CI
ggplot() +
geom_line(data = index_mb.allp, aes(x = year, y = est, color = sample, fill = sample), size = 2) +
geom_ribbon(data = index_mb.allp, aes(x = year, y = est, color = sample, fill = sample, ymin = lwr, ymax = upr),
alpha = 0.25) +
geom_point(data = index_real, aes(x = year, y = index.eta), size = 3, color = "black") +
labs(y = "Total biomass estimate",
x = "Year")
## Warning in geom_line(data = index_mb.allp, aes(x = year, y = est, color =
## sample, : Ignoring unknown aesthetics: fill
head(index_db.allp)
## # A tibble: 6 × 7
## year index variance se lwr upr sample
## <int> <dbl> <dbl> <dbl> <dbl> <dbl> <fct>
## 1 1 10280. 161777. 402. 9481. 11078. 1perc
## 2 2 10219. 191047. 437. 9352. 11087. 1perc
## 3 3 11269. 253750. 504. 10270. 12269. 1perc
## 4 4 12580. 258042. 508. 11572. 13588. 1perc
## 5 5 12419. 189240. 435. 11555. 13282. 1perc
## 6 6 12062. 213773. 462. 11145. 12980. 1perc
head(index_mb.allp)
## year est lwr upr log_est se se_natural type sample
## 1 1 10405.67 9615.994 11260.20 9.250106 0.04026771 10326.33 index 1perc
## 2 2 10325.35 9535.587 11180.53 9.242357 0.04059839 10240.77 index 1perc
## 3 3 11394.80 10557.159 12298.89 9.340912 0.03895610 11307.67 index 1perc
## 4 4 12774.62 11812.771 13814.79 9.455216 0.03993921 12673.60 index 1perc
## 5 5 12670.17 11731.789 13683.61 9.447006 0.03926002 12571.57 index 1perc
## 6 6 11960.17 11068.958 12923.14 9.389337 0.03950969 11867.65 index 1perc
index_db.allp.b = index_db.allp %>% select(year, sample, index, lwr, upr) %>%
mutate(type = "Design")
index_mb.allp.b = index_mb.allp %>% select(year, sample, est, lwr, upr) %>%
rename(index = est) %>%
mutate(type = "Model")
index.allp = rbind(index_db.allp.b, index_mb.allp.b)
head(index.allp)
## # A tibble: 6 × 6
## year sample index lwr upr type
## <int> <fct> <dbl> <dbl> <dbl> <chr>
## 1 1 1perc 10280. 9481. 11078. Design
## 2 2 1perc 10219. 9352. 11087. Design
## 3 3 1perc 11269. 10270. 12269. Design
## 4 4 1perc 12580. 11572. 13588. Design
## 5 5 1perc 12419. 11555. 13282. Design
## 6 6 1perc 12062. 11145. 12980. Design
# plot both together
ggplot() +
geom_line(data = index.allp, aes(x = year, y = index, color = type, fill = type)) +
geom_ribbon(data = index.allp, aes(x = year, y = index, color = type, fill = type, ymin = lwr, ymax = upr),
alpha = 0.3) +
geom_point(data = index_real, aes(x = year, y = index.eta), size = 3, color = "black") +
labs(y = "Total biomass estimate",
x = "Year") +
facet_grid(.~sample)
## Warning in geom_line(data = index.allp, aes(x = year, y = index, color = type,
## : Ignoring unknown aesthetics: fill
Ok main takeaway is that sampling more in the grid does not necessarily improve your estimate of abundance. Idk why. How does oversampling work? Is it the same idea as “overfitting” a model or did I not deal with the increase in sample size well?
More negative scaled depth = shallower
grid.dat %>% filter(negDepth >= -10) %>% group_by(year) %>% summarize(mds = mean(max(depth_scaled)))
## # A tibble: 6 × 2
## year mds
## <int> <dbl>
## 1 1 -1.03
## 2 2 -1.04
## 3 3 -1.04
## 4 4 -1.06
## 5 5 -1.07
## 6 6 -1.06
# -1.06
# check
ggplot(subset(grid.dat, depth_scaled >= -1.06), aes(x = X, y = Y)) +
geom_raster(aes(fill = negDepth)) +
scale_fill_viridis() +
scale_x_continuous(expand = c(0, 0)) + # Remove expansion on x-axis
scale_y_continuous(expand = c(0, 0)) # Remove expansion on x-axis
# sample from each year of survey data
set.seed(245)
sample.dat = sim.dat %>% group_by(year) %>% slice_sample(n = 100)
sample.dat.no.s = sim.dat %>% filter(depth_scaled >= -1.06) %>%
group_by(year) %>% slice_sample(n = 100)
samples = sampleGrid.fn(as.data.frame(sample.dat), obsCV, catchabilityPars, varName="eta_scaled")
samples.no.s = sampleGrid.fn(as.data.frame(sample.dat.no.s), obsCV, catchabilityPars, varName="eta_scaled")
# plot samples over surface of mean without observation error (taken as "true")
# all
ggplot(sim.dat, aes(x = X, y = Y)) +
geom_raster(aes(fill = eta_scaled)) +
geom_point(data = samples, aes(size = observation), pch = 21) +
scale_fill_viridis() +
scale_size_area() +
coord_cartesian(expand = FALSE) +
facet_wrap(~year)
# no shallow
ggplot(sim.dat, aes(x = X, y = Y)) +
geom_raster(aes(fill = eta_scaled)) +
geom_point(data = samples.no.s, aes(size = observation), pch = 21) +
scale_fill_viridis() +
scale_size_area() +
coord_cartesian(expand = FALSE) +
facet_wrap(~year)
Can see no sampling in shallow regions (far left and right side for this grid).
N = 100*100
n = 100
index_db = samples %>%
group_by(year) %>%
summarize(index = mean(observation) * N,
variance = N^2 * (1-(n/N)) * (var(observation)/n)) %>%
mutate(se = sqrt(variance),
lwr = index - qt(0.975, df = n - 1) * se,
upr = index + qt(0.975, df = n - 1) * se,
sample = "all")
index_db.no.s = samples.no.s %>%
group_by(year) %>%
summarize(index = mean(observation) * N,
variance = N^2 * (1-(n/N)) * (var(observation)/n)) %>%
mutate(se = sqrt(variance),
lwr = index - qt(0.975, df = n - 1) * se,
upr = index + qt(0.975, df = n - 1) * se,
sample = "no shallow")
index_db.alld = rbind(index_db, index_db.no.s)
# plot index with 95% CI
ggplot() +
geom_line(data = index_db.alld, aes(x = year, y = index, color = sample, fill = sample), size = 2) +
geom_ribbon(data = index_db.alld, aes(x = year, y = index, color = sample, fill = sample, ymin = lwr, ymax = upr),
alpha = 0.25) +
geom_point(data = index_real, aes(x = year, y = index.eta), size = 3, color = "black") +
labs(y = "Total biomass estimate",
x = "Year")
## Warning in geom_line(data = index_db.alld, aes(x = year, y = index, color =
## sample, : Ignoring unknown aesthetics: fill
It is better in some parts (towards Y4-6), so perhaps that is a spatial thing occurring …have more issues with no shallow in the earlier years.
# create mesh to size of samples
mesh = make_mesh(samples, xy_cols = c("X", "Y"), type = "cutoff_search", n_knots = 50)
## cutoff = 1.00 | knots = 1060 | ↓
## cutoff = 100.00 | knots = 24 | ↑
## cutoff = 10.00 | knots = 91 | ↓
## cutoff = 31.62 | knots = 32 | ↑
## cutoff = 17.78 | knots = 47 | ↑
## cutoff = 13.34 | knots = 66 | ↓
## cutoff = 15.40 | knots = 56 | ↓
## cutoff = 16.55 | knots = 53 | ↓
## cutoff = 17.15 | knots = 52 | ↓
## cutoff = 17.47 | knots = 48 | ↑
## cutoff = 17.31 | knots = 51 | ↓
## cutoff = 17.39 | knots = 51 | ↓
## cutoff = 17.43 | knots = 51 | ↓
## cutoff = 17.45 | knots = 51 | ↓
## cutoff = 17.46 | knots = 51 | ↓
## cutoff = 17.46 | knots = 51 | ↓
## cutoff = 17.46 | knots = 51 | ↓
## cutoff = 17.46 | knots = 51 | ↓
## cutoff = 17.46 | knots = 48 | ↑
## cutoff = 17.46 | knots = 48 | ↑
## cutoff = 17.46 | knots = 48
plot(mesh)
mesh.no.s = make_mesh(samples.no.s, xy_cols = c("X", "Y"), type = "cutoff_search", n_knots = 75)
## cutoff = 1.00 | knots = 1022 | ↓
## cutoff = 100.00 | knots = 31 | ↑
## cutoff = 10.00 | knots = 78 | ↓
## cutoff = 31.62 | knots = 34 | ↑
## cutoff = 17.78 | knots = 43 | ↑
## cutoff = 13.34 | knots = 58 | ↑
## cutoff = 11.55 | knots = 68 | ↑
## cutoff = 10.75 | knots = 72 | ↑
## cutoff = 10.37 | knots = 73 | ↑
## cutoff = 10.18 | knots = 72 | ↑
## cutoff = 10.09 | knots = 72 | ↑
## cutoff = 10.04 | knots = 74 | ↑
## cutoff = 10.02 | knots = 74 | ↑
## cutoff = 10.01 | knots = 74 | ↑
## cutoff = 10.01 | knots = 74 | ↑
## cutoff = 10.00 | knots = 74 | ↑
## cutoff = 10.00 | knots = 74 | ↑
## cutoff = 10.00 | knots = 74 | ↑
## cutoff = 10.00 | knots = 74 | ↑
## cutoff = 10.00 | knots = 74
plot(mesh.no.s)
model.fit = sdmTMB(
formula = observation ~ 0 + as.factor(year),
data = samples,
mesh = mesh,
time = "year",
family = lognormal(),
spatial = "on", # c("on", "off")
spatiotemporal = "iid", # c("iid", "ar1", "rw", "off")
)
model.fit
## Spatiotemporal model fit by ML ['sdmTMB']
## Formula: observation ~ 0 + as.factor(year)
## Mesh: mesh (isotropic covariance)
## Time column: character
## Data: samples
## Family: lognormal(link = 'log')
##
## Conditional model:
## coef.est coef.se
## as.factor(year)1 -0.06 0.08
## as.factor(year)2 -0.09 0.08
## as.factor(year)3 0.12 0.08
## as.factor(year)4 0.26 0.08
## as.factor(year)5 0.21 0.08
## as.factor(year)6 0.17 0.08
##
## Dispersion parameter: 0.37
## Matérn range: 23.56
## Spatial SD: 0.13
## Spatiotemporal IID SD: 0.22
## ML criterion at convergence: 332.958
##
## See ?tidy.sdmTMB to extract these values as a data frame.
sanity(model.fit) # model checking: good
## ✔ Non-linear minimizer suggests successful convergence
## ✔ Hessian matrix is positive definite
## ✔ No extreme or very small eigenvalues detected
## ✔ No gradients with respect to fixed effects are >= 0.001
## ✔ No fixed-effect standard errors are NA
## ✔ No standard errors look unreasonably large
## ✔ No sigma parameters are < 0.01
## ✔ No sigma parameters are > 100
## ✔ Range parameter doesn't look unreasonably large
model.fit.no.s = sdmTMB(
formula = observation ~ 0 + as.factor(year),
data = samples.no.s,
mesh = mesh.no.s,
time = "year",
family = lognormal(),
spatial = "on", # c("on", "off")
spatiotemporal = "iid", # c("iid", "ar1", "rw", "off")
)
model.fit.no.s
## Spatiotemporal model fit by ML ['sdmTMB']
## Formula: observation ~ 0 + as.factor(year)
## Mesh: mesh.no.s (isotropic covariance)
## Time column: character
## Data: samples.no.s
## Family: lognormal(link = 'log')
##
## Conditional model:
## coef.est coef.se
## as.factor(year)1 0.08 0.09
## as.factor(year)2 0.00 0.09
## as.factor(year)3 0.14 0.09
## as.factor(year)4 0.26 0.09
## as.factor(year)5 0.21 0.09
## as.factor(year)6 0.08 0.09
##
## Dispersion parameter: 0.39
## Matérn range: 39.41
## Spatial SD: 0.13
## Spatiotemporal IID SD: 0.13
## ML criterion at convergence: 358.812
##
## See ?tidy.sdmTMB to extract these values as a data frame.
sanity(model.fit.no.s) # model checking: good
## ✔ Non-linear minimizer suggests successful convergence
## ✔ Hessian matrix is positive definite
## ✔ No extreme or very small eigenvalues detected
## ✔ No gradients with respect to fixed effects are >= 0.001
## ✔ No fixed-effect standard errors are NA
## ✔ No standard errors look unreasonably large
## ✔ No sigma parameters are < 0.01
## ✔ No sigma parameters are > 100
## ✔ Range parameter doesn't look unreasonably large
# randomized quantile residuals
samples$resids = residuals(model.fit)
hist(samples$resids) # looks ok but a bit left tailed
samples.no.s$resids = residuals(model.fit.no.s)
hist(samples.no.s$resids) # looks actually a little better
# qq plot
qqnorm(samples$resids)
qqnorm(samples.no.s$resids)
# spatial residuals
ggplot(samples, aes(x = X, y = Y, color = resids)) +
scale_color_gradient2() +
geom_point() +
coord_fixed() +
facet_wrap(~year)
ggplot(samples.no.s, aes(x = X, y = Y, color = resids)) +
scale_color_gradient2() +
geom_point() +
coord_fixed() +
facet_wrap(~year)
# visualize predictions, then how fixed and random effects contribute
plot_map = function(sim.dat, column) { ggplot(sim.dat, aes(x = X, y = Y, fill = {{ column }})) +
geom_raster() + facet_wrap(~year) + coord_fixed() }
# predict
predictions = predict(model.fit, newdata = grid.dat, return_tmb_object = TRUE)
predictions.no.s = predict(model.fit.no.s, newdata = grid.dat, return_tmb_object = TRUE)
# full prediciton
plot_map(predictions$data, exp(est)) +
scale_fill_viridis_c(trans = "sqrt") +
ggtitle("Prediction (fixed effects + all random effects)")
plot_map(predictions.no.s$data, exp(est)) +
scale_fill_viridis_c(trans = "sqrt") +
ggtitle("Prediction (fixed effects + all random effects)")
# fixed effects
plot_map(predictions$data, exp(est_non_rf)) +
ggtitle("Prediction (fixed effects only)") +
scale_fill_viridis_c(trans = "sqrt")
plot_map(predictions.no.s$data, exp(est_non_rf)) +
ggtitle("Prediction (fixed effects only)") +
scale_fill_viridis_c(trans = "sqrt")
# spatial random only
plot_map(predictions$data, omega_s) +
ggtitle("Spatial random effects only") +
scale_fill_gradient2()
plot_map(predictions.no.s$data, omega_s) +
ggtitle("Spatial random effects only") +
scale_fill_gradient2()
# oh is my mesh not big enough ?
# increased mesh but still having corners cut off
# spatiaotemporal random only
plot_map(predictions$data, epsilon_st) +
ggtitle("Spatiotemporal random effects only") +
scale_fill_gradient2()
plot_map(predictions.no.s$data, epsilon_st) +
ggtitle("Spatiotemporal random effects only") +
scale_fill_gradient2()
# we will assume that the area of each grid cell is 1
index_mb = get_index(predictions, area = 1, bias_correct = TRUE, level = 0.95) %>%
mutate(sample = "all")
index_mb.no.s = get_index(predictions.no.s, area = 1, bias_correct = TRUE, level = 0.95) %>%
mutate(sample = "no shallow")
index_mb.alld = rbind(index_mb, index_mb.no.s)
# desired confidence interval here is 95%
# plot index with 95% CI
ggplot() +
geom_line(data = index_mb.alld, aes(x = year, y = est, color = sample, fill = sample), size = 2) +
geom_ribbon(data = index_mb.alld, aes(x = year, y = est, color = sample, fill = sample, ymin = lwr, ymax = upr),
alpha = 0.25) +
geom_point(data = index_real, aes(x = year, y = index.eta), size = 3, color = "black") +
labs(y = "Total biomass estimate",
x = "Year")
## Warning in geom_line(data = index_mb.alld, aes(x = year, y = est, color =
## sample, : Ignoring unknown aesthetics: fill
head(index_db.alld)
## # A tibble: 6 × 7
## year index variance se lwr upr sample
## <int> <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 1 9765. 148473. 385. 9000. 10529. all
## 2 2 9293. 170693. 413. 8473. 10113. all
## 3 3 11684. 303895. 551. 10590. 12778. all
## 4 4 13346. 319999. 566. 12224. 14469. all
## 5 5 12423. 184141. 429. 11572. 13275. all
## 6 6 12564. 314422. 561. 11451. 13677. all
head(index_mb.alld)
## year est lwr upr log_est se se_natural type sample
## 1 1 9660.373 8923.775 10457.77 9.175787 0.04046664 9583.573 index all
## 2 2 9392.298 8679.061 10164.15 9.147645 0.04029490 9312.828 index all
## 3 3 11789.339 10923.325 12724.01 9.374951 0.03892682 11694.318 index all
## 4 4 13466.033 12457.756 14555.92 9.507926 0.03970840 13354.734 index all
## 5 5 12715.550 11775.592 13730.54 9.450581 0.03918271 12611.376 index all
## 6 6 12150.301 11248.871 13123.97 9.405109 0.03933040 12051.655 index all
index_db.alld.b = index_db.alld %>% select(year, sample, index, lwr, upr) %>%
mutate(type = "Design")
index_mb.alld.b = index_mb.alld %>% select(year, sample, est, lwr, upr) %>%
rename(index = est) %>%
mutate(type = "Model")
index.alld = rbind(index_db.alld.b, index_mb.alld.b)
head(index.alld)
## # A tibble: 6 × 6
## year sample index lwr upr type
## <int> <chr> <dbl> <dbl> <dbl> <chr>
## 1 1 all 9765. 9000. 10529. Design
## 2 2 all 9293. 8473. 10113. Design
## 3 3 all 11684. 10590. 12778. Design
## 4 4 all 13346. 12224. 14469. Design
## 5 5 all 12423. 11572. 13275. Design
## 6 6 all 12564. 11451. 13677. Design
# plot both together
ggplot() +
geom_line(data = index.alld, aes(x = year, y = index, color = type, fill = type)) +
geom_ribbon(data = index.alld, aes(x = year, y = index, color = type, fill = type, ymin = lwr, ymax = upr),
alpha = 0.3) +
geom_point(data = index_real, aes(x = year, y = index.eta), size = 3, color = "black") +
labs(y = "Total biomass estimate",
x = "Year") +
facet_grid(.~sample)
## Warning in geom_line(data = index.alld, aes(x = year, y = index, color = type,
## : Ignoring unknown aesthetics: fill
Similar assessment between both of the models which is they do better picking up the downward turn towards later part of time series and do not do well in the earlier part of time series (well overestimated).
From year 1, we think we need to split up top into 2 strata (3 strata total)
head(sim.dat)
## year X Y eta observed eta_scaled observed_scaled depth_scaled
## 1 1 1 1 17.04969 26.029819 0.7428023 0.7252767 -1.639687
## 2 1 2 1 17.37308 30.805359 0.7568914 0.8583390 -1.572175
## 3 1 3 1 17.69647 22.991022 0.7709806 0.6406058 -1.504663
## 4 1 4 1 18.01986 9.307899 0.7850697 0.2593488 -1.437152
## 5 1 5 1 18.34325 24.644069 0.7991588 0.6866651 -1.369640
## 6 1 6 1 18.66665 24.320600 0.8132480 0.6776522 -1.302129
# based off Year 1, more obs in Y50-100
test = sim.dat %>%
mutate(Stratum = ifelse(X <= 49 & Y > 49, "TL",
ifelse(X > 49 & Y > 49, "TR", "B")))
ggplot(subset(test, year == 1), aes(x = X, y = Y)) +
geom_raster(aes(fill = Stratum)) +
#scale_fill_viridis() +
scale_x_continuous(expand = c(0, 0)) + # Remove expansion on x-axis
scale_y_continuous(expand = c(0, 0))
# ok it works!
sim.dat.strata = sim.dat %>%
mutate(Stratum = ifelse(X <= 49 & Y > 49, "TL",
ifelse(X > 49 & Y > 49, "TR", "B")))
head(sim.dat.strata)
## year X Y eta observed eta_scaled observed_scaled depth_scaled Stratum
## 1 1 1 1 17.04969 26.029819 0.7428023 0.7252767 -1.639687 B
## 2 1 2 1 17.37308 30.805359 0.7568914 0.8583390 -1.572175 B
## 3 1 3 1 17.69647 22.991022 0.7709806 0.6406058 -1.504663 B
## 4 1 4 1 18.01986 9.307899 0.7850697 0.2593488 -1.437152 B
## 5 1 5 1 18.34325 24.644069 0.7991588 0.6866651 -1.369640 B
## 6 1 6 1 18.66665 24.320600 0.8132480 0.6776522 -1.302129 B
Need different sampling “n” because not 100 for each stratum.
# sample from each year of survey data
set.seed(245)
sample.dat = sim.dat.strata %>% group_by(year) %>% slice_sample(n = 100)
sample.dat.TL = sim.dat.strata %>% filter(Stratum == "TL") %>%
group_by(year) %>% slice_sample(n = 25)
sample.dat.TR = sim.dat.strata %>% filter(Stratum == "TR") %>%
group_by(year) %>% slice_sample(n = 25)
sample.dat.B = sim.dat.strata %>% filter(Stratum == "B") %>%
group_by(year) %>% slice_sample(n = 50)
samples = sampleGrid.fn(as.data.frame(sample.dat), obsCV, catchabilityPars, varName="eta_scaled")
samples.TL = sampleGrid.fn(as.data.frame(sample.dat.TL), obsCV, catchabilityPars, varName="eta_scaled")
samples.TR = sampleGrid.fn(as.data.frame(sample.dat.TR), obsCV, catchabilityPars, varName="eta_scaled")
samples.B = sampleGrid.fn(as.data.frame(sample.dat.B), obsCV, catchabilityPars, varName="eta_scaled")
# plot samples over surface of mean without observation error (taken as "true")
# all
ggplot(sim.dat.strata, aes(x = X, y = Y)) +
geom_raster(aes(fill = eta_scaled)) +
geom_point(data = samples, aes(size = observation), pch = 21) +
scale_fill_viridis() +
scale_size_area() +
coord_cartesian(expand = FALSE) +
facet_wrap(~year)
# TL
ggplot(sim.dat.strata, aes(x = X, y = Y)) +
geom_raster(aes(fill = eta_scaled)) +
geom_point(data = samples.TL, aes(size = observation), pch = 21) +
scale_fill_viridis() +
scale_size_area() +
coord_cartesian(expand = FALSE) +
facet_wrap(~year)
# TR
ggplot(sim.dat.strata, aes(x = X, y = Y)) +
geom_raster(aes(fill = eta_scaled)) +
geom_point(data = samples.TR, aes(size = observation), pch = 21) +
scale_fill_viridis() +
scale_size_area() +
coord_cartesian(expand = FALSE) +
facet_wrap(~year)
# B
ggplot(sim.dat.strata, aes(x = X, y = Y)) +
geom_raster(aes(fill = eta_scaled)) +
geom_point(data = samples.B, aes(size = observation), pch = 21) +
scale_fill_viridis() +
scale_size_area() +
coord_cartesian(expand = FALSE) +
facet_wrap(~year)
Sampling occuring in respective strata…if equal sample size for each strata and for whole grid…do we get same answer?
N = 100*100
n = 100
nT = 25
nB = 50
index_db = samples %>%
group_by(year) %>%
summarize(index = mean(observation) * N,
variance = N^2 * (1-(n/N)) * (var(observation)/n)) %>%
mutate(se = sqrt(variance),
lwr = index - qt(0.975, df = n - 1) * se,
upr = index + qt(0.975, df = n - 1) * se,
sample = "all")
index_db.TL = samples.TL %>%
group_by(year) %>%
summarize(index = mean(observation) * N,
variance = N^2 * (1-(nT/N)) * (var(observation)/nT)) %>%
mutate(se = sqrt(variance),
lwr = index - qt(0.975, df = nT - 1) * se,
upr = index + qt(0.975, df = nT - 1) * se,
sample = "Top Left")
index_db.TR = samples.TR %>%
group_by(year) %>%
summarize(index = mean(observation) * N,
variance = N^2 * (1-(nT/N)) * (var(observation)/nT)) %>%
mutate(se = sqrt(variance),
lwr = index - qt(0.975, df = nT - 1) * se,
upr = index + qt(0.975, df = nT - 1) * se,
sample = "Top Right")
index_db.B = samples.B %>%
group_by(year) %>%
summarize(index = mean(observation) * N,
variance = N^2 * (1-(nB/N)) * (var(observation)/nB)) %>%
mutate(se = sqrt(variance),
lwr = index - qt(0.975, df = nB - 1) * se,
upr = index + qt(0.975, df = nB - 1) * se,
sample = "Bottom")
index_db.strata = rbind(index_db.TL, index_db.TR, index_db.B) %>%
group_by(year) %>%
summarize(index = mean(index),
variance = mean(variance),
se = mean(se),
lwr = mean(lwr),
upr = mean(upr)) %>%
mutate(sample = "Strata")
index_db.allstr = rbind(index_db, index_db.strata)
# plot index with 95% CI
ggplot() +
geom_line(data = index_db.allstr, aes(x = year, y = index, color = sample, fill = sample), size = 2) +
geom_ribbon(data = index_db.allstr, aes(x = year, y = index, color = sample, fill = sample, ymin = lwr, ymax = upr),
alpha = 0.25) +
geom_point(data = index_real, aes(x = year, y = index.eta), size = 3, color = "black") +
labs(y = "Total biomass estimate",
x = "Year")
## Warning in geom_line(data = index_db.allstr, aes(x = year, y = index, color =
## sample, : Ignoring unknown aesthetics: fill
STOPPING HERE—I DON’T UNDERSTAND HOW TO GET ABUDNANCE FROM STRATA ********